library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
Bulk counts for both True bulk and Pseudo Bulk generated by either GATK or Monovar.
Count_Table$Label<-paste(Count_Table$Patient..,Count_Table$Sample)
Bulk_counts<-Count_Table[which(Count_Table$Sample=="True Bulk"|Count_Table$Sample=="Pseudo Bulk"),]
Bulk_counts$Letter<-c("a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b")
ggplot(Bulk_counts,aes(x=paste(Bulk_counts$Patient..,Bulk_counts$Letter,Bulk_counts$Sample),y=Gatk...SNP..,fill=Patient..))+geom_boxplot()
ggplot(Bulk_counts,aes(x=paste(Bulk_counts$Patient..,Bulk_counts$Letter,Bulk_counts$Sample),y=Monovar...SNP..,fill=Patient..))+geom_boxplot()+xlab("")
Overlap between True and Pseudo Bulk.
Overlap.with.TB$Label<-paste(Overlap.with.TB$Patient..,Overlap.with.TB$Sample)
TB_PB<-Overlap.with.TB[which(Overlap.with.TB$Sample=="Pseudo Bulk"),]
ggplot(TB_PB)+geom_boxplot(aes(x=Label,y=Gatk...Jaccard.Index,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Jaccard.Index,fill=Patient..))+ylim(0,1)+ylab("Jaccard Index")+xlab("")
genotype concordance match between overlap
Overlap.with.TB.zyg$Label<-paste(Overlap.with.TB.zyg$Patient..,Overlap.with.TB.zyg$Sample)
TB_PB_zyg<-Overlap.with.TB.zyg[which(Overlap.with.TB.zyg$Sample=="Pseudo Bulk"),]
TB_PB_zyg$Zyg.Percent<-as.numeric(as.character(TB_PB_zyg$Zyg.Percent))
TB_PB_zyg$Zyg.Percent.1<-as.numeric(as.character(TB_PB_zyg$Zyg.Percent.1))
ggplot(TB_PB_zyg,aes(x=Label,y=Zyg.Percent,fill=Patient..))+geom_boxplot()+ylim(0,100)+ylab("Percent of Overlap")+xlab("")+geom_boxplot(aes(x=paste("monovar",Label),y=Zyg.Percent.1,fill=Patient..))
Overlap with Gold Standard (1000 Genome Project Phase 3) as well as genotype concordance percentages of overlap.
GS_overlap$Label<-paste(GS_overlap$Patient..,GS_overlap$Sample)
GS_bulk<-GS_overlap[which(GS_overlap$Sample=="True Bulk"|GS_overlap$Sample=="Pseudo Bulk"),]
ggplot(GS_bulk)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)
ggplot(GS_bulk)+geom_boxplot(aes(x=Label,y=GATK...Zyg..,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Zyg..,fill=Patient..))+ylim(0,100)
Counts from both True and Pseudo Bulk from NA19098 across all locations
Bulk_gakt<-Location_counts[which(Location_counts$Method=="GATK"),]
Bulk_gatk_098<-Bulk_gakt[which(Bulk_gakt$Patient=="NA19098"),]
ggplot(Bulk_gatk_098,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Bulk_mon<-Location_counts[which(Location_counts$Method=="Monovar"),]
Bulk_mon_098<-Bulk_gakt[which(Bulk_mon$Patient=="NA19098"),]
ggplot(Bulk_mon_098,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Jaccard Index of Overlap between True and Pseudo Bulk across the genomic locations
Bulk_098<-Location_counts[which(Location_counts$Patient=="NA19098"),]
ggplot(Bulk_098,aes(x=Location,y=Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap with Gold Standard and genotype concordance across all the locations in NA19098 (both bulk samples)
Bulk_098<-Location_counts[which(Location_counts$Patient=="NA19098"),]
ggplot(Bulk_098,aes(x=Location,y=True.Bulk...gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Bulk SNPs")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
ggplot(Bulk_098,aes(x=Location,y=True.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+ylab("Percentage of Bulk SNPs")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SNP count from the Single Cell Data when varing the Percent cutoff
Count_Table$Label<-paste(Count_Table$Patient..,Count_Table$Sample)
SC_counts<-Count_Table[which(Count_Table$Sample=="SC - 0.6"|Count_Table$Sample=="SC - 0.7"|Count_Table$Sample=="SC - 0.8"|Count_Table$Sample=="SC - 0.9"|Count_Table$Sample=="SC - 1.0"),]
ggplot(SC_counts,aes(x=Label,y=Gatk...SNP..,fill=Patient..))+geom_boxplot()+ylab("Number of SNPs")
ggplot(SC_counts,aes(x=Label,y=Monovar...SNP..,fill=Patient..))+geom_boxplot()+ylab("Number of SNPs")
Single Cell SNP call overlap with the True Bulk Sample (varing percent cutoff)
SC_TB<-TB_overlap[which(TB_overlap$Sample=="SC - 0.6"|TB_overlap$Sample=="SC - 0.7"|TB_overlap$Sample=="SC - 0.8"|TB_overlap$Sample=="SC - 0.9"|TB_overlap$Sample=="SC - 1.0"),]
SC_TB$Label<-paste(SC_TB$Patient..,SC_TB$Sample)
ggplot(SC_TB)+geom_boxplot(aes(x=Label,y=Gatk...Jaccard.Index,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Jaccard.Index,fill=Patient..))+ylab("Jaccard Index")+ylim(0,1)
ggplot(SC_TB)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylab("Percentage of SNPs Identified\nin True Bulk Sample")
SC_TB$Zyg.Percent<-as.numeric(as.character(SC_TB$Zyg.Percent))
SC_TB$Zyg.Percent.1<-as.numeric(as.character(SC_TB$Zyg.Percent.1))
ggplot(SC_TB)+geom_boxplot(aes(x=Label,y=Zyg.Percent,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Zyg.Percent.1,fill=Patient..))+ylab("Percent of Overlap\nWith Same genotype concordance")+ylim(0,100)
Single cell SNP call overlap with Gold Standard dataset plus genotype concordance match (varing percent cutoff)
SC_gs<-GS_overlap[which(GS_overlap$Sample=="SC - 0.6"|GS_overlap$Sample=="SC - 0.7"|GS_overlap$Sample=="SC - 0.8"|GS_overlap$Sample=="SC - 0.9"|GS_overlap$Sample=="SC - 1.0"),]
SC_gs$Label<-paste(SC_gs$Patient..,SC_gs$Sample)
ggplot(SC_gs)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)
ggplot(SC_gs)+geom_boxplot(aes(x=Label,y=GATK...Zyg..,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Zyg..,fill=Patient..))+ylim(0,100)
Single Cell (percent cutoff) SNP counts across all the different locations
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.7"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 0.9"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_gatk<-SC_location_counts[which(SC_location_counts$Method=="GATK"),]
SC_loc_gatk_098<-SC_loc_gatk[which(SC_loc_gatk$Patient=="NA19098"),]
ggplot(SC_loc_gatk_098,aes(x=paste(SC_loc_gatk_098$Location,SC_loc_gatk_098$Cutoff),y=SC_loc_gatk_098$Total..SNPs,fill=SC_loc_gatk_098$Location))+geom_boxplot()+ylab("Number of SNPs")
SC_loc_mon<-SC_location_counts[which(SC_location_counts$Method=="Monovar"),]
SC_loc_mon_098<-SC_loc_mon[which(SC_loc_mon$Patient=="NA19098"),]
ggplot(SC_loc_mon_098,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Total SNPs")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Schematic is drawn in Adobe Illustrator and not in R. The plots for mapped reads were drawn in R.
Read_counts$Label<-paste(Read_counts$Patient,":",Read_counts$Sample)
Read_counts$Label<-factor(Read_counts$Label,levels=c("NA19098 : True Bulk","NA19098 : Pseudo Bulk","NA19098 : SC","NA19101 : True Bulk","NA19101 : Pseudo Bulk","NA19101 : SC","NA19239 : True Bulk","NA19239 : Pseudo Bulk","NA19239 : SC"))
ggplot(Read_counts,aes(x=Label,y=Total.Reads))+geom_boxplot(aes(fill=Patient))+geom_point(aes(fill=Patient))+ylab("Total Number of Reads")
ggplot(Read_counts,aes(x=Label,y=Percent.Mapped))+geom_boxplot(aes(fill=Patient))+geom_point(aes(fill=Patient))+ylab("Percentage of Mapped Reads")
used Venny online to do the overlaps and not here
Overlap with the Bulk samples and the two other databases: dbSNP146 and 1000 Genomes Project (phase 1)
DBSNP.overlap$Label<-paste(DBSNP.overlap$Patient..,DBSNP.overlap$Sample)
DBSNP.bulk<-DBSNP.overlap[which(DBSNP.overlap$Sample=="True Bulk"|DBSNP.overlap$Sample=="Pseudo Bulk"),]
DBSNP.bulk$Label<-factor(DBSNP.bulk$Label,levels = c("NA19098 True Bulk","NA19098 Pseudo Bulk","NA19101 True Bulk","NA19101 Pseudo Bulk","NA19239 True Bulk","NA19239 Pseudo Bulk"),ordered = T)
DBSNP.bulk$Letter<-c("a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b")
ggplot(DBSNP.bulk)+geom_boxplot(aes(x=paste(DBSNP.bulk$Patient..,DBSNP.bulk$Letter,DBSNP.bulk$Sample),y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",DBSNP.bulk$Patient..,DBSNP.bulk$Letter,DBSNP.bulk$Sample),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)
V1000G_overlap$Label<-paste(V1000G_overlap$Patient..,V1000G_overlap$Sample)
V1000G_bulk<-V1000G_overlap[which(V1000G_overlap$Sample=="True Bulk"|V1000G_overlap$Sample=="Pseudo Bulk"),]
V1000G_bulk$Letter<-c("a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b","a","b")
ggplot(V1000G_bulk)+geom_boxplot(aes(x=paste(V1000G_bulk$Patient..,V1000G_bulk$Letter,V1000G_bulk$Sample),y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",V1000G_bulk$Patient..,V1000G_bulk$Letter,V1000G_bulk$Sample),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)
Analysis when varying the coverage requirement
Coverage.analysis$Label<-paste(Coverage.analysis$Patient,":",Coverage.analysis$Cutoff)
Coverage.analysis$actual<-10^Coverage.analysis$Cutoff
Coverage.analysis$Sample<-with(Coverage.analysis,factor(Sample,levels=rev(levels(Sample))))
Coverage_db<-Coverage.analysis[which(Coverage.analysis$DB=="dbSNP 146"),]
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="GATK"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Total,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Total Number of SNPs")
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="Monovar"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Total,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Total Number of SNPs")
Overlap across the different read coverage with dbSNP 146
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="GATK"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Percent Overlap")+ylim(0,100)
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="Monovar"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Percent Overlap")+ylim(0,100)
Overlap across the different read coverage with Gold Standard
Coverage_db<-Coverage.analysis[which(Coverage.analysis$DB=="Gold Standard"),]
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="GATK"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Percent Overlap")+ylim(0,100)
Coverage_db_gatk<-Coverage_db[which(Coverage_db$Method=="Monovar"),]
ggplot(Coverage_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Coverage")+ylab("Percent Overlap")+ylim(0,100)
SNP counts across all the quality score cut-offs
Phred.score.analysis$Label<-paste(Phred.score.analysis$Patient,":",Phred.score.analysis$Cutoff)
Phred.score.analysis$actual<-10^Phred.score.analysis$Cutoff
Phred.score.analysis$Sample<-with(Phred.score.analysis,factor(Sample,levels=rev(levels(Sample))))
Phred_db<-Phred.score.analysis[which(Phred.score.analysis$DB=="dbSNP 146"),]
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="GATK"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Total,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Total Number of SNPs")
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="Monovar"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Total,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Total Number of SNPs")
Overlap across the different quality score with dbSNP 146
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="GATK"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Percent Overlap")+ylim(0,100)
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="Monovar"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Percent Overlap")+ylim(0,100)
Overlap across the different quality score with Gold Standard
Phred_db<-Phred.score.analysis[which(Phred.score.analysis$DB=="Gold Standard"),]
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="GATK"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Percent Overlap")+ylim(0,100)
Phred_db_gatk<-Phred_db[which(Phred_db$Method=="Monovar"),]
ggplot(Phred_db_gatk,aes(x=actual,y=Percent*100,col=Sample,group=paste(actual,Patient,Sample),fill=Patient))+geom_boxplot()+scale_color_manual(values=c("black","red"))+annotation_logticks(side="b")+theme(panel.grid.minor = element_blank())+scale_x_log10()+xlab("Read Phred")+ylab("Percent Overlap")+ylim(0,100)
Overlap for NA19101 and NA19239 True Bulk and Pseudo Bulk across all locations as called by GATK
Bulk_gakt<-Location_counts[which(Location_counts$Method=="GATK"),]
Bulk_gatk_101<-Bulk_gakt[which(Bulk_gakt$Patient=="NA19101"),]
ggplot(Bulk_gatk_101,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Bulk_gatk_239<-Bulk_gakt[which(Bulk_gakt$Patient=="NA19239"),]
ggplot(Bulk_gatk_239,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Overlap for NA19101 and NA19239 True Bulk and Pseudo Bulk across all locations as called by Monovar
Bulk_mon<-Location_counts[which(Location_counts$Method=="Monovar"),]
Bulk_mon_101<-Bulk_mon[which(Bulk_mon$Patient=="NA19101"),]
ggplot(Bulk_mon_101,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Bulk_mon_239<-Bulk_mon[which(Bulk_mon$Patient=="NA19239"),]
ggplot(Bulk_mon_239,aes(x=Location,y=True.Bulk.Total..SNPs,fill=Location))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk.Total..SNPs,fill=Location))+ylab("Number of SNPs")
Overlap between the True Bulk and Pseudo Bulk across all genomic locations in NA19101 and NA19239
Bulk_101<-Location_counts[which(Location_counts$Patient=="NA19101"),]
ggplot(Bulk_101,aes(x=Location,y=Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Bulk_239<-Location_counts[which(Location_counts$Patient=="NA19239"),]
ggplot(Bulk_239,aes(x=Location,y=Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap between all three patients across all location to dbSNP146
Bulk_098<-Location_counts[which(Location_counts$Patient=="NA19098"),]
ggplot(Bulk_098,aes(x=Location,y=True.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_101<-Location_counts[which(Location_counts$Patient=="NA19101"),]
ggplot(Bulk_101,aes(x=Location,y=True.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_239<-Location_counts[which(Location_counts$Patient=="NA19239"),]
ggplot(Bulk_239,aes(x=Location,y=True.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...dbSNP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap with all three patients and the 1000 Genome Project (phase 1)
Bulk_098<-Location_counts[which(Location_counts$Patient=="NA19098"),]
ggplot(Bulk_098,aes(x=Location,y=True.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_101<-Location_counts[which(Location_counts$Patient=="NA19101"),]
ggplot(Bulk_101,aes(x=Location,y=True.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_239<-Location_counts[which(Location_counts$Patient=="NA19239"),]
ggplot(Bulk_239,aes(x=Location,y=True.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...1000GP.overlap.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of NA19101 and NA19239 with Gold Standard
Bulk_101<-Location_counts[which(Location_counts$Patient=="NA19101"),]
ggplot(Bulk_101,aes(x=Location,y=True.Bulk...gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_239<-Location_counts[which(Location_counts$Patient=="NA19239"),]
ggplot(Bulk_239,aes(x=Location,y=True.Bulk...gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.of.total,fill=c("grey"),col=Method))+ylab("Percentage of Location Total")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of NA19101 and NA19239 with Gold Standard genotype concordance
Bulk_101<-Location_counts[which(Location_counts$Patient=="NA19101"),]
ggplot(Bulk_101,aes(x=Location,y=True.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+ylab("Percentage of Gold Standard Overlap")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Bulk_239<-Location_counts[which(Location_counts$Patient=="NA19239"),]
ggplot(Bulk_239,aes(x=Location,y=True.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+geom_boxplot(aes(x=paste(Location,"PB"),y=Pseudo.Bulk...gs.zyg.of.gs,fill=c("grey"),col=Method))+ylab("Percentage of Gold Standard Overlap")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Read Depth Across Genomic Locations of SNPs called by GATK
ggplot(NA19098_all_location_coverage_gatk,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
ggplot(NA19101_all_location_coverage_gatk,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
ggplot(NA19239_all_location_coverage_gatk,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
Read Depth Across Genomic Locations of SNPs called by Monovar
ggplot(NA19098_all_location_coverage_monovar,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
ggplot(NA19101_all_location_coverage_monovar,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
ggplot(NA19239_all_location_coverage_monovar,aes(x=paste(V1,location),y=X3,fill=V1))+geom_boxplot()+xlab("")+ylab("Read Depth")+guides(fill=FALSE)+ylim(0,135)
SNP counts between 0.9 and 1.0 genotype agreement criteria
Gatk_counts<-Finer.cutoff.analyses[which(Finer.cutoff.analyses$Method=="GATK"),]
ggplot(Gatk_counts,aes(x=paste(Patient,Cutoff),y=Total.count,fill=Patient))+geom_boxplot()+xlab("")+ylab("Number of SNPs")
Monovar_counts<-Finer.cutoff.analyses[which(Finer.cutoff.analyses$Method=="Monovar"),]
ggplot(Monovar_counts,aes(x=paste(Patient,Cutoff),y=Total.count,fill=Patient))+geom_boxplot()+xlab("")+ylab("Number of SNPs")
Comparison with True Bulk and the SNPs identified between the 0.9 and 1.0 genotype agreement criteria
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=TB.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=TB.Jaccard,fill=Patient))+geom_boxplot()+ylim(0,1)+ylab("Jaccard Index")+xlab("")
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=TB.zyg.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Overlap between Single cell SNP calls (percent cutoff) and Pseudo Bulk
Overlap.with.PB.zyg$Label<-paste(Overlap.with.PB.zyg$Patient..,Overlap.with.PB.zyg$Sample)
SC_PB_zyg<-Overlap.with.PB.zyg[which(Overlap.with.PB.zyg$Sample=="SC - 0.6"|Overlap.with.PB.zyg$Sample=="SC - 0.7"|Overlap.with.PB.zyg$Sample=="SC - 0.8"|Overlap.with.PB.zyg$Sample=="SC - 0.9"|Overlap.with.PB.zyg$Sample=="SC - 1.0"),]
SC_PB_zyg$Gatk...Jaccard.Index<-as.numeric(as.character(SC_PB_zyg$Gatk...Jaccard.Index))
SC_PB_zyg$Gatk...Jaccard.Index.1<-as.numeric(as.character(SC_PB_zyg$Gatk...Jaccard.Index.1))
ggplot(SC_PB_zyg,aes(x=Label,y=Gatk...Jaccard.Index,fill=Patient..))+geom_boxplot()+ylim(0,1)+ylab("Jaccard Index")+xlab("")+geom_boxplot(aes(x=paste("monovar",Label),y=Gatk...Jaccard.Index.1,fill=Patient..))
ggplot(SC_PB_zyg,aes(x=Label,y=Gatk.....Overlap.2,fill=Patient..))+geom_boxplot()+ylim(0,100)+ylab("Percentage of SNPs Identified\nin Pseudo Bulk Sample")+xlab("")+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.2,fill=Patient..))
SC_PB_zyg$Zyg.Percent<-as.numeric(as.character(SC_PB_zyg$Zyg.Percent))
SC_PB_zyg$Zyg.Percent.1<-as.numeric(as.character(SC_PB_zyg$Zyg.Percent.1))
ggplot(SC_PB_zyg,aes(x=Label,y=Zyg.Percent,fill=Patient..))+geom_boxplot()+ylim(0,100)+ylab("Percent of Overlap\nWith Same genotype concordance")+xlab("")+geom_boxplot(aes(x=paste("monovar",Label),y=Zyg.Percent.1,fill=Patient..))
Overlap with SNP calls between the 0.9 and 1.0 genotype agreement criteria and Pseudo Bulk
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=PB.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=PB.Jaccard,fill=Patient))+geom_boxplot()+ylim(0,1)+ylab("Jaccard Index")+xlab("")
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=PB.zyg.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Overlap between Single cell SNP calls (percent cutoff) and dbSNP 146
SC_dbsnp<-DBSNP.overlap[which(DBSNP.overlap$Sample=="SC - 0.6"|DBSNP.overlap$Sample=="SC - 0.7"|DBSNP.overlap$Sample=="SC - 0.8"|DBSNP.overlap$Sample=="SC - 0.9"|DBSNP.overlap$Sample=="SC - 1.0"),]
SC_dbsnp$Label<-paste(SC_dbsnp$Patient..,SC_dbsnp$Sample)
ggplot(SC_dbsnp)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylab("Percentage of SNPs\nIdentified in dbSNP 146")+ylim(0,100)
Overlap between Single cell SNP calls (percent cutoff) and 1000 Genomes Project (phase 1)
SC_v1000<-V1000G_overlap[which(V1000G_overlap$Sample=="SC - 0.6"|V1000G_overlap$Sample=="SC - 0.7"|V1000G_overlap$Sample=="SC - 0.8"|V1000G_overlap$Sample=="SC - 0.9"|V1000G_overlap$Sample=="SC - 1.0"),]
SC_v1000$Label<-paste(SC_v1000$Patient..,SC_v1000$Sample)
ggplot(SC_v1000)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylab("Percentage of SNPs Identified in\n 1000 Genome Project (Phase 1)")+ylim(0,100)
Overlap with SNP calls between the 0.9 and 1.0 genotype agreement criteria and dbSNP 146
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=dbsnp.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Overlap with SNP calls between the 0.9 and 1.0 genotype agreement criteria and 1000 Genomes Project (phase 1)
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=V1000.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Overlap with SNP calls between the 0.9 and 1.0 genotype agreement criteria and Gold Standard
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=GS.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Overlap with SNP calls between the 0.9 and 1.0 genotype agreement criteria and Gold Standard with genotype concordance
ggplot(Finer.cutoff.analyses,aes(x=paste(Method,Patient,Cutoff),y=GS.zyg.percent*100,fill=Patient))+geom_boxplot()+ylim(0,100)+ylab("Percentage")+xlab("")
Number of SNPs across the cell number cutoff in all three patients
Count_Table$Label<-paste(Count_Table$Patient..,Count_Table$Sample)
SC_counts<-Count_Table[which(Count_Table$Sample=="SC - 10"|Count_Table$Sample=="SC - 20"|Count_Table$Sample=="SC - 30"|Count_Table$Sample=="SC - 40"|Count_Table$Sample=="SC - 50"|Count_Table$Sample=="SC - 60"|Count_Table$Sample=="SC - 70"|Count_Table$Sample=="SC - 80"|Count_Table$Sample=="SC - 90"),]
ggplot(SC_counts,aes(x=paste(Patient..,Sample),y=Gatk...SNP..,fill=Patient..))+geom_boxplot()+ylab("Number of SNPs")
ggplot(SC_counts)+geom_boxplot(aes(x=paste("M",Patient..,Sample),y=Monovar...SNP..,fill=Patient..))+ylab("Number of SNPs")
Overlap with True Bulk as well as genotype concordance
SC_TB<-TB_overlap[which(TB_overlap$Sample=="SC - 10"|TB_overlap$Sample=="SC - 20"|TB_overlap$Sample=="SC - 30"|TB_overlap$Sample=="SC - 40"|TB_overlap$Sample=="SC - 50"|TB_overlap$Sample=="SC - 60"|TB_overlap$Sample=="SC - 70"|TB_overlap$Sample=="SC - 80"|TB_overlap$Sample=="SC - 90"),]
SC_TB$Label<-paste(SC_TB$Patient..,SC_TB$Sample)
ggplot(SC_TB)+geom_boxplot(aes(x=Label,y=Gatk...Jaccard.Index,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Jaccard.Index,fill=Patient..))+ylab("Jaccard Index")+ylim(0,1)
SC_TB$Zyg.Percent<-as.numeric(as.character(SC_TB$Zyg.Percent))
SC_TB$Zyg.Percent.1<-as.numeric(as.character(SC_TB$Zyg.Percent.1))
ggplot(SC_TB)+geom_boxplot(aes(x=Label,y=Zyg.Percent,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Zyg.Percent.1,fill=Patient..))+ylim(0,100)+ylab("Percent of Overlapped SNPs")
Overlap with Pseudo Bulk as well as genotype concordance
SC_PB<-PB_overlap[which(PB_overlap$Sample=="SC - 10"|PB_overlap$Sample=="SC - 20"|PB_overlap$Sample=="SC - 30"|PB_overlap$Sample=="SC - 40"|PB_overlap$Sample=="SC - 50"|PB_overlap$Sample=="SC - 60"|PB_overlap$Sample=="SC - 70"|PB_overlap$Sample=="SC - 80"|PB_overlap$Sample=="SC - 90"),]
SC_PB$Label<-paste(SC_PB$Patient..,SC_PB$Sample)
SC_PB$Gatk...Jaccard.Index<-as.numeric(as.character(SC_PB$Gatk...Jaccard.Index))
SC_PB$Gatk...Jaccard.Index.1<-as.numeric(as.character(SC_PB$Gatk...Jaccard.Index.1))
ggplot(SC_PB)+geom_boxplot(aes(x=Label,y=Gatk...Jaccard.Index,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Gatk...Jaccard.Index.1,fill=Patient..))+ylab("Jaccard Index")+ylim(0,1)
SC_PB$Zyg.Percent<-as.numeric(as.character(SC_PB$Zyg.Percent))
SC_PB$Zyg.Percent.1<-as.numeric(as.character(SC_PB$Zyg.Percent.1))
ggplot(SC_PB)+geom_boxplot(aes(x=Label,y=Zyg.Percent,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Zyg.Percent.1,fill=Patient..))+ylim(0,100)+ylab("Percent of Overlapped SNPs")
Overlap with Single Cell (cell num cutoff) and dbSNP 146
SC_dbsnp<-DBSNP.overlap[which(DBSNP.overlap$Sample=="SC - 10"|DBSNP.overlap$Sample=="SC - 20"|DBSNP.overlap$Sample=="SC - 30"|DBSNP.overlap$Sample=="SC - 40"|DBSNP.overlap$Sample=="SC - 50"|DBSNP.overlap$Sample=="SC - 60"|DBSNP.overlap$Sample=="SC - 70"|DBSNP.overlap$Sample=="SC - 80"|DBSNP.overlap$Sample=="SC - 90"),]
SC_dbsnp$Label<-paste(SC_dbsnp$Patient..,SC_dbsnp$Sample)
ggplot(SC_dbsnp)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)+ylab("Percentage of SNPs Identified\nAlso in dbSNP146")
Overlap with Single Cell (cell num cutoff) and 1000 Genome Project (phase 1)
SC_v1000<-V1000G_overlap[which(V1000G_overlap$Sample=="SC - 10"|V1000G_overlap$Sample=="SC - 20"|V1000G_overlap$Sample=="SC - 30"|V1000G_overlap$Sample=="SC - 40"|V1000G_overlap$Sample=="SC - 50"|V1000G_overlap$Sample=="SC - 60"|V1000G_overlap$Sample=="SC - 70"|V1000G_overlap$Sample=="SC - 80"|V1000G_overlap$Sample=="SC - 90"),]
SC_v1000$Label<-paste(SC_v1000$Patient..,SC_v1000$Sample)
ggplot(SC_v1000)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)+ylab("Percentage of SNPs Identified\nAlso in 1000 Genome Project (phase 1)")
Overlap with Single Cell (cell num cutoff) and Gold Standard as well as genotype concordance
SC_gs<-GS_overlap[which(GS_overlap$Sample=="SC - 10"|GS_overlap$Sample=="SC - 20"|GS_overlap$Sample=="SC - 30"|GS_overlap$Sample=="SC - 40"|GS_overlap$Sample=="SC - 50"|GS_overlap$Sample=="SC - 60"|GS_overlap$Sample=="SC - 70"|GS_overlap$Sample=="SC - 80"|GS_overlap$Sample=="SC - 90"),]
SC_gs$Label<-paste(SC_gs$Patient..,SC_gs$Sample)
ggplot(SC_gs)+geom_boxplot(aes(x=Label,y=Gatk.....Overlap.1,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar.....Overlap.1,fill=Patient..))+ylim(0,100)+ylab("Percentage of SNPs Identified\nAlso in Gold Standard")
ggplot(SC_gs)+geom_boxplot(aes(x=Label,y=GATK...Zyg..,fill=Patient..))+geom_boxplot(aes(x=paste("monovar",Label),y=Monovar...Zyg..,fill=Patient..))+ylim(0,100)+ylab("Percentage of SNPs With Same\ngenotype concordance in Gold Standard")
Number of SNPs called by GATK across all locations under varying genotype agreement cut-offs in NA19101
SC_Location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.7"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 0.9"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_gatk<-SC_location_counts[which(SC_location_counts$Method=="GATK"),]
SC_loc_gatk_101<-SC_loc_gatk[which(SC_loc_gatk$Patient=="NA19101"),]
ggplot(SC_loc_gatk_101,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
Number of SNPs called by Monovar across all locations under varying genotype agreement cut-offs in NA19101
SC_loc_mon<-SC_location_counts[which(SC_location_counts$Method=="Monovar"),]
SC_loc_mon_101<-SC_loc_mon[which(SC_loc_mon$Patient=="NA19101"),]
ggplot(SC_loc_mon_101,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
Number of SNPs called by GATK across all locations under varying genotype agreement cut-offs in NA19239
SC_Location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.7"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 0.9"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_gatk<-SC_location_counts[which(SC_location_counts$Method=="GATK"),]
SC_loc_gatk_239<-SC_loc_gatk[which(SC_loc_gatk$Patient=="NA19239"),]
ggplot(SC_loc_gatk_239,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
Number of SNPs called by Monovar across all locations under varying genotype agreement cut-offs in NA19239
SC_loc_mon<-SC_location_counts[which(SC_location_counts$Method=="Monovar"),]
SC_loc_mon_239<-SC_loc_mon[which(SC_loc_mon$Patient=="NA19239"),]
ggplot(SC_loc_mon_239,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
Overlap between True Bulk and Single cell (percent cutoff) in NA19101 and NA19239
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap between True Bulk and Single cell (percent cutoff) with genotype concordance in NA19101 and NA19239
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of all three patients Single cell (percent cutoff) and the corresponding Pseudo Bulk
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap of all three patients Single cell (percent cutoff) and the corresponding Pseudo Bulk with genotype agreement
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of all three patients with dbSNP 146
SC_dbsnp<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_dbsnp_098<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19098"),]
ggplot(SC_dbsnp_098,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_dbsnp_101<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19101"),]
ggplot(SC_dbsnp_101,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_dbsnp_239<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19239"),]
ggplot(SC_dbsnp_239,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
Overlap of all three patients with 1000 Genome Project (phase 1)
SC_dbsnp<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_dbsnp_098<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19098"),]
ggplot(SC_dbsnp_098,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_dbsnp_101<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19101"),]
ggplot(SC_dbsnp_101,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_dbsnp_239<-SC_dbsnp[which(SC_dbsnp$Patient=="NA19239"),]
ggplot(SC_dbsnp_239,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
Overlap of NA19101 and NA19239 Single cell (percent cutoff) and Gold Standard in NA19101 and NA19239
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Total SNPs")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Total SNPs")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of NA19101 and NA19239 Single cell (percent cutoff) and Gold Standard with genotype concordance in NA19101 and NA19239
SC_location_counts<-Location_counts_sc[which(Location_counts_sc$Cutoff=="SC - 0.6"|Location_counts_sc$Cutoff=="SC - 0.8"|Location_counts_sc$Cutoff=="SC - 1.0"),]
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill=c("grey"),col=Method))+geom_boxplot()+ylab("Percent of Overlaped\nSNPs with Matched genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SNP Counts called by GATK in all three patients across all genomic locations while varying cell number criteria
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 20"|SC.Cell.Num.Location$Cutoff=="SC - 30"|SC.Cell.Num.Location$Cutoff=="SC - 40"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 60"|SC.Cell.Num.Location$Cutoff=="SC - 70"|SC.Cell.Num.Location$Cutoff=="SC - 80"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
SC_locat_098_gatk<-SC_loc_098[which(SC_loc_098$Method=="GATK"),]
ggplot(SC_locat_098_gatk,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
SC_locat_101_gatk<-SC_loc_101[which(SC_loc_101$Method=="GATK"),]
ggplot(SC_locat_101_gatk,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
SC_locat_239_gatk<-SC_loc_239[which(SC_loc_239$Method=="GATK"),]
ggplot(SC_locat_239_gatk,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SNP Counts called by Monovar in all three patients across all genomic locations while varying cell number criteria
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 20"|SC.Cell.Num.Location$Cutoff=="SC - 30"|SC.Cell.Num.Location$Cutoff=="SC - 40"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 60"|SC.Cell.Num.Location$Cutoff=="SC - 70"|SC.Cell.Num.Location$Cutoff=="SC - 80"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
SC_locat_098_mon<-SC_loc_098[which(SC_loc_098$Method=="Monovar"),]
ggplot(SC_locat_098_mon,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
SC_locat_101_mon<-SC_loc_101[which(SC_loc_101$Method=="Monovar"),]
ggplot(SC_locat_101_mon,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
SC_locat_239_mon<-SC_loc_239[which(SC_loc_239$Method=="Monovar"),]
ggplot(SC_locat_239_mon,aes(x=paste(Location,Cutoff),y=Total..SNPs,fill=Location))+geom_boxplot()+ylab("Number of SNPs")
Overlap of Single cell (cell num cutoff) and True Bulk in all three patients
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=TB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap of Single cell (cell num cutoff) and True Bulk with genotype concordance in all three patients
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..TB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap between Single Cell SNPs (cell num cutoff) and Pseudo Bulk in all three patients
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=PB.Jaccard.Index,fill="grey",col=Method))+geom_boxplot()+ylab("Jaccard Index")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,1)
Overlap between Single Cell SNPs (cell num cutoff) and Pseudo Bulk with genotype concordance in all three patients
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..PB.overlap.with.zygosity.match,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith Same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))
Overlap of the Single cell SNPs (cell number cutoff) and dbSNP146
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..dbSNP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with dbSNP 146")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
Overlap of the Single cell SNPs (cell number cutoff) and 1000 Genome Project (phase 1)
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_239<-SC_location_counts[which(SC_location_counts$Patient=="NA19239"),]
ggplot(SC_loc_239,aes(x=paste(Location,Cutoff),y=X..1000GP.overlap.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith 1000 Genome Project (phase 1)")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
Overlap of the Single cell SNPs (cell number cutoff) and the Gold Standard as well as matching the genotype concordance and alleles for each SNP call
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with Gold Standard")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with Gold Standard")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.of.total,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlaped \nSNPs with Gold Standard")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
Overlap of the Single cell SNPs (cell number cutoff) and the Gold Standard with genotype concordance
SC_location_counts<-SC.Cell.Num.Location[which(SC.Cell.Num.Location$Cutoff=="SC - 10"|SC.Cell.Num.Location$Cutoff=="SC - 50"|SC.Cell.Num.Location$Cutoff=="SC - 90"),]
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith the same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
SC_loc_101<-SC_location_counts[which(SC_location_counts$Patient=="NA19101"),]
ggplot(SC_loc_101,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith the same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
SC_loc_098<-SC_location_counts[which(SC_location_counts$Patient=="NA19098"),]
ggplot(SC_loc_098,aes(x=paste(Location,Cutoff),y=X..gs.zyg.of.gs,fill="grey",col=Method))+geom_boxplot()+ylab("Percentage of Overlapped SNPs\nWith the same genotype concordance")+scale_fill_manual(values=c("grey"))+scale_color_manual(values=c("black","red"))+ylim(0,100)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).